RQ1: What baseline factors predict participation in TEDS?
In this script, we use data collected at Y2 to predict attrition and most of the later time points.
Analyses run in docker container bignardig/tidyverse451:v7
Updated on 8-Jan-2026 to replace household variable with derived single parent variable.
Load Data
At some time points, participants in cohorts 3 & 4 were not all eligible to take part
List of variables
Code
data.frame(
var = c(rq1x, rq1y, rq1y_twin),
labels = c(rq1x_labels, rq1y_labels, rq1y_twin_labels),
clean_labels = c(rq1x_labels_clean, rq1y_labels_clean, rq1y_twin_labels)
)Singly Impute Missing Predictor/Baseline Data
Imputation
Code
daddy_issues = df_rq1 %>%
select(adadagetw, afasoc2, afahqual)
df_rq1_imputed = df_rq1 %>%
# select(-adadagetw, -afasoc2, -afahqual) %>% # removing these eliminates imputation warnings
mice(method = "pmm", m =1)
iter imp variable
1 1 amumagetw adadagetw asingle amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang atwclub alookels asmoke adrink astress pollution1998pca
2 1 amumagetw adadagetw asingle amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang atwclub alookels asmoke adrink astress pollution1998pca
3 1 amumagetw adadagetw asingle amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang atwclub alookels asmoke adrink astress pollution1998pca
4 1 amumagetw adadagetw asingle amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang atwclub alookels asmoke adrink astress pollution1998pca
5 1 amumagetw adadagetw asingle amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang atwclub alookels asmoke adrink astress pollution1998pca
Code
df_rq1_imputed$loggedEventsNULL
Code
df_rq1_imputed = complete(df_rq1_imputed)
vars_to_na = c("btwdata","ctwdata", "gcdata","icdata","jcdata","pcwebdata")
df_rq1_imputed = df_rq1_imputed %>%
mutate(
across(starts_with(vars_to_na), ~ ifelse(cohort %in% c("Cohort 3: twins born Sep-95 to Aug-96","Cohort 4: twins born Sep-96 to Dec-96"), NA, .))
)
# df_rq1_imputed$gcdata1 %>% table(., useNA = "always")Create Long Imputed dataset for twin-level analyses
For imputation, because predictor variables are only measured at the family level, imputation is performed in the wide format with one row per family.
For modelling twin-level outcomes with glm(), we need to convert outcomes to a long format with one row per twin.
Code
df_rq1_imputed$famid = 1:nrow(df_rq1_imputed)
df1 = df_rq1_imputed[c("famid", rq1x, rq1y_twin1)]
df2 = df_rq1_imputed[c("famid", rq1x, rq1y_twin2)]
# Rename twin2 columns to match twin1 column names
colnames(df2) = colnames(df1)
df_rq1_long = rbind(df1, df2) %>%
arrange(famid)
rm(df1, df2)
# Set outcome to missing for participants ineligible at specific time points
df_rq1_long$btwdata1 %>% table(., useNA = "always").
<NA>
0
famid sex1 amumagetw adadagetw asingle zygos amedtot afasoc2 afahqual amosoc2 amohqual atwmed1 aethnicc alang anoldsib anyngsib atwclub alookels asmoke adrink astress pollution1998pca dtwdata1 lcwdata1 lcqdata1 pcbhdata1 rcqdata1 u1cdata1 zmhdata1 zcdata1
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Correlation of participation over time
Code
df %>%
filter(twin == 1) %>%
select(starts_with(rq1y_twin)) %>%
select(matches("1$|2$")) %>%
rename_with(.fn = function(x) var_to_label(x, twinsuffix = TRUE)) %>%
rename_with(.fn = function(x) gsub(", 1Y 0N","",x)) %>%
gbtoolbox::plot_correlations(
sample_size = FALSE,
confidence_interval = FALSE
)Warning in gbtoolbox::plot_correlations(., sample_size = FALSE, confidence_interval = FALSE): This function is in development, and not yet ready for widespread use.
Proceed with caution
Code
df %>%
select(randomfamid,twin,all_of(rq1y)) %>%
filter(twin == 1) %>%
select(-twin, -randomfamid) %>%
`colnames<-`(c(rq1y_labels)) %>%
gbtoolbox::plot_correlations(
sample_size = FALSE,
confidence_interval = FALSE
) +
labs(
title = "Correlation between participation at different \ntime points",
subtitle = "N = 13020 (Number of families)"
)Warning in gbtoolbox::plot_correlations(., sample_size = FALSE, confidence_interval = FALSE): This function is in development, and not yet ready for widespread use.
Proceed with caution
Code
save_plot("1_correlation_participation", width = 9, height = 9)List of predictor variables
| rq1x | rq1x_labels |
|---|---|
| sex1 | Twin sex, 0female 1male |
| amumagetw | Age in years of natural mother at time of birth of twins |
| adadagetw | Age in years of natural father at time of birth of twins |
| asingle | Single Parent |
| zygos | Twin pair zygosity (best available estimate), 1MZ 2DZ |
| amedtot | Mother medical risk factor composite scale (1st Contact), standardised |
| afasoc2 | Father SOC employment level (1st Contact), 1-9 |
| afahqual | Male parent highest qualification level (1st Contact), see value labels |
| amosoc2 | Mother SOC employment level (1st Contact), 1-9 |
| amohqual | Female parent highest qualification level (1st Contact), see value labels |
| atwmed1 | Twin medical risk factor composite scale (1st Contact), standardised |
| aethnicc | Ethnic origin of twins, original codes (1st Contact), see value labels |
| alang | Main language spoken at home (1st Contact), see value labels |
| anoldsib | Number of older siblings (1st Contact), see value labels |
| anyngsib | Number of younger siblings (1st Contact), see value labels |
| atwclub | Member of a Twins Club (1st Contact), 1Y 0N |
| alookels | Twins looked after by anyone else (1st Contact), 1Y 0N |
| asmoke | Smoked cigarettes while pregnant (1st Contact), 1Y 0N |
| adrink | Drank alcohol while pregnant (1st Contact), 1Y 0N |
| astress | Severe stress during pregnancy (1st Contact), 1Y 0N |
| pollution1998pca | Principal Component of 1998 pollution variables |
List of outcome variables
| rq1y_twin1 | rq1y_twin_labels |
|---|---|
| dtwdata1 | Twin booklet data present (4 Year), 1Y 0N |
| lcwdata1 | Twin web data (at least one test) present at 12, 1Y 0N |
| lcqdata1 | Child questionnaire data present at 12, 1Y 0N |
| pcbhdata1 | Child behaviour booklet data present at 16, 1Y 0N |
| rcqdata1 | Twin questionnaire data present at 18, 1Y 0N |
| u1cdata1 | TEDS21 phase 1 twin qnr data present, 1Y 0N |
| zmhdata1 | TEDS26 twin MHQ data present, 1Y 0N |
| zcdata1 | CATSLife twin web study: data are present, 1Y 0N |
Twin-Level Logistic Regressions to each participation outcome
Fit Models
Fit logistic regression models to each participation outcome
Code
1 / 8
2 / 8
3 / 8
4 / 8
5 / 8
6 / 8
7 / 8
8 / 8
Calc participant participation predictions
This approach doesn’t give us standard errors for our predictions.
** Predictions for participants at cohorts 3-4 at specific timepoints are kept in here!! **
** For plotting purposes, the intercept is removed from predictions. **
Code
set.seed(1)
twinmodels_predictions = list()
for (i in seq_along(rq1y_twin)){
twinmodels_predictions[[i]] = twinmodels[[i]]$data
twinmodels_predictions[[i]]$prediction = twinmodels[[i]] %>%
predict(
.,
newdata = df_rq1_long,
type = "link",
na.action = na.pass,
) - twinmodels[[i]]$coefficients[1] # removed intercept term
}
names(twinmodels_predictions) = rq1y_twin
twinmodels_predictions = bind_rows(twinmodels_predictions, .id = "outcome")
twinmodels_predictions %>%
select(famid, outcome, prediction) %>%
mutate(
outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin)]
) %>%
group_by(outcome) %>%
filter(!duplicated(famid)) %>%
pivot_wider(
values_from = "prediction",
names_from = "outcome",
id_cols = "famid"
) %>%
data.frame() %>%
select(-famid) %>%
gbtoolbox::plot_correlations(
sample_size = FALSE
) +
labs(
subtitle = "Correlation between predicted probability (type = link) across outcomes"
)Warning in gbtoolbox::plot_correlations(., sample_size = FALSE): This function is in development, and not yet ready for widespread use.
Proceed with caution
Code
save_plot("1_rq1_predicted_probability_plots", width = 8, height = 8)Significance of predictor variables
A detailed version of the figure below can be found here here.
Code
# plan(multicore, workers = ncores_use) # this won't work when rendering quarto docs
plan(multisession, workers = ncores_use)
glm_model_comparison_results = future_lapply(
twinmodels,
glm_model_comparison_robust,
future.seed = TRUE
)Code
glm_model_comparison_results_df = do.call("rbind.data.frame", glm_model_comparison_results)
glm_model_comparison_results_df$Variables_full = rq1x_labels_clean[match(glm_model_comparison_results_df$Variables_Dropped, rq1x)]
# Adjust p-values and add star
glm_model_comparison_results_df = glm_model_comparison_results_df %>%
filter(Variables_Dropped!="None") %>%
group_by(outcome) %>%
mutate(Wald_p = stats::p.adjust(Wald_p, method = "holm")) %>%
ungroup() %>%
mutate(
Wald_p_star = as.character(symnum(Wald_p, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")))
)
variable_importance_order = glm_model_comparison_results_df %>%
group_by(Variables_full) %>%
summarise(mean_score = mean(Wald_p)) %>%
arrange(mean_score) %>%
pull(Variables_full)
glm_model_comparison_results_df = glm_model_comparison_results_df %>%
mutate(
Variables_full = factor(Variables_full, levels = variable_importance_order),
var_outcome = paste(Variables_Dropped, outcome, sep = "_")
)
# Plotting code
glm_model_comparison_results_df %>%
mutate(
outcome_labeled = rq1y_twin_labels_clean[match(.$outcome,rq1y_twin1)],
outcome_labeled = factor(outcome_labeled, levels = rq1y_twin_labels_clean),
# outcome_labeled = outcome_labeled %>% str_remove("data.*"),
# outcome_labeled = factor(outcome_labeled, levels = str_remove(var_to_label(rq1y),"data.*")),
criterion = Delta_AUC,
sig_level = case_when(
Wald_p < 0.001 ~ "p < 0.001",
Wald_p < 0.01 ~ "p < 0.01",
Wald_p < 0.05 ~ "p < 0.05",
Wald_p < 0.1 ~ "p < 0.1",
TRUE ~ "Not significant"
),
sig_level = factor(sig_level, levels = c("p < 0.001", "p < 0.01", "p < 0.05", "p < 0.1", "Not significant"))
) %>%
# Clean the delta_AUC variable that gets printed on the plots
mutate(
criterion2 = gsub("0\\.", ".", sprintf("%.3f", criterion)),
criterion2 = case_when(
Wald_p < 0.001 ~ paste0(criterion2,"***"),
Wald_p < 0.01 ~ paste0(criterion2,"***"),
Wald_p <= 0.05 ~ paste0(criterion2,"***"),
Wald_p > 0.05 ~ paste0(criterion2)
),
) %>%
ggplot(aes(y = Variables_full, x = criterion)) +
geom_col(aes(fill = sig_level), alpha = 0.8) +
geom_text(aes(label = criterion2), # Remove ALL 0. patterns
size = 2.5, fontface = "bold", color = "black") +
facet_wrap(~outcome_labeled, scales = "fixed", ncol = 6) +
scale_x_continuous(labels = function(x) gsub("0\\.", ".", sprintf("%.3f", x))) + # Remove ALL 0. patterns
scale_fill_manual(
values = c("p < 0.001" = "#d73027", "p < 0.01" = "#fc8d59",
"p < 0.05" = "#fee08b", "p < 0.1" = "#e0f3f8",
"Not significant" = "#d9d9d9"),
name = "Significance\n(Holm-adjusted)"
) +
labs(
x = "Change in AUC (when variable removed)",
y = NULL,
title = "Variable Importance by AUC Change",
subtitle = "Colors show Holm-adjusted significance levels"
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 7),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
strip.text = element_text(size = 9, face = "bold"),
legend.position = "bottom"
)Code
save_plot("1_rq1_variable_importance_twin-level", width = 14, height = 7)Calc variable importance for plotting
Code
variable_importance = glm_model_comparison_results_df %>%
filter(Variables_Dropped != "None") %>%
group_by(Variables_full) %>%
summarise(
n_significant_05 = sum(Wald_p < 0.05, na.rm = TRUE),
avg_delta_auc = mean(pmax(0, Delta_AUC * -10000), na.rm = TRUE),
avg_p_value = mean(Wald_p, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(n_significant_05)) %>%
mutate(importance_rank = row_number())Circular Heatmap
Code
rq1y_twin_labels_clean_extrashort2 = paste0(rq1y_twin_labels_clean_extrashort, " ")
# Alternative circular heatmap approach
heatmap_data = glm_model_comparison_results_df %>%
filter(Variables_Dropped != "None") %>%
mutate(
outcome_labeled = rq1y_twin_labels_clean_extrashort2[match(outcome, rq1y_twin1)],
outcome_labeled = factor(outcome_labeled, levels = rq1y_twin_labels_clean_extrashort2),
Variables_wrapped = str_wrap(Variables_full, width = 8),
# Set Delta_AUC to NA if not significant, otherwise keep original negative value
# Set to grey for specific variables
Delta_AUC_fill = case_when(
Wald_p < 0.05 ~ abs(Delta_AUC),
TRUE ~ NA
)
) %>%
# Join with importance ranking from above
left_join(variable_importance, by = "Variables_full") %>%
mutate(
# Set factor levels based on importance ranking
Variables_wrapped = factor(Variables_wrapped,
levels = unique(Variables_wrapped[order(importance_rank)]))
)
# Add empty variables to create gap
n_vars = length(levels(heatmap_data$Variables_wrapped))
all_vars = c(levels(heatmap_data$Variables_wrapped))
heatmap_data$Variables_wrapped = factor(heatmap_data$Variables_wrapped, levels = all_vars)
ggplot(heatmap_data, aes(x = Variables_wrapped, y = outcome_labeled)) +
geom_tile(aes(fill = Delta_AUC_fill,
color = ifelse(Variables_Dropped %in% c("astress", "atwmed1", "adrink", "anyngsib", "alookels", "afasoc2", "adadagetw", "alang"), "azure2", "black")),
size = .5) +
scale_color_identity() +
geom_text(
aes(label = ifelse(Wald_p < 0.05,
gsub("^(-?)0\\.", "\\1.", sprintf("%.3f", Delta_AUC)), ""),
size = ifelse(outcome_labeled %in% c("Y4 "), 1.4, 2.8)
),
color = "white"
) +
scale_size_identity() +
# Add custom y-axis labels in the gap
coord_polar(theta = "x", start = 0, direction = 1) +
# scale_x_discrete(drop = FALSE, labels = function(x) ifelse(grepl("GAP_", x), "", x)) + # Hide GAP labels
scale_fill_gradient(
na.value = "white",
low = "#f48c84",
high = "#d73027",
limits = c(0, max(heatmap_data$Delta_AUC_fill, na.rm = TRUE)),
guide = "none"
) +
labs(
tag = "B",
title = "Variable Importance Heatmap",
subtitle = "Values indicate change in AUC when variable is removed from model\nRed cells are statistically significant"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, size = 11, colour = "black"),
axis.text.y = element_blank(), # Hide default y-axis labels
axis.title = element_blank(),
panel.grid = element_blank(),
plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
plot.tag.position = "topleft",
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 13.5, margin = margin(b = 0))
) +
{
y_positions = seq_along(levels(heatmap_data$outcome_labeled))
labels = levels(heatmap_data$outcome_labeled)
lapply(seq_along(labels), function(i) {
annotate("text", x = 0.5, y = y_positions[i],
label = labels[i], size = 2.8, hjust = 1, angle = 7)
})
}Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Code
save_plot("1_rq1_heatmap_twinparticipationpredictors", width = 8, height = 8, trim = TRUE)AUC and Variance Explained Plots
Code
model_metrics = do.call("rbind.data.frame", glm_model_comparison_results) %>%
filter(Variables_full=="None") %>%
select(-contains("Delta"),-contains("Wald")) %>%
mutate(
outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin1)],
outcome = factor(outcome, levels = rq1y_twin_labels_clean_extrashort)
)
model_metricsCode
plot_data = model_metrics %>%
select(outcome, AUC, mcfad_r2) %>%
mutate(
AUC_scaled = -(AUC - 0.5),
R2_scaled = mcfad_r2 * 2.6
) %>%
pivot_longer(cols = c(AUC_scaled, R2_scaled), names_to = "metric", values_to = "value_scaled") %>%
mutate(
metric_label = case_when(
metric == "AUC_scaled" ~ "AUC",
metric == "R2_scaled" ~ "McFadden's R²"
),
original_value = ifelse(metric == "AUC_scaled", AUC, mcfad_r2),
label_value = case_when(
metric == "AUC_scaled" ~ gbtoolbox::apa_num(original_value,3),
metric == "R2_scaled" ~ paste0(gbtoolbox::apa_num(original_value*100,2),"%")
)
)
ggplot(plot_data, aes(y = outcome, x = value_scaled, fill = metric_label)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_text(aes(label = label_value),
hjust = ifelse(plot_data$metric == "AUC_scaled", 1.1, -0.1),
size = 2.9) +
scale_fill_manual(values = c("AUC" = "#5e3c99", "McFadden's R²" = "#e66101")) +
labs(
tag = "A",
title = "Participation Prediction Accuracy",
# subtitle = "Accuracy in predicting \nparticipation at each Time Point",
x = NULL,
y = NULL,
fill = NULL
) +
theme_minimal() +
coord_cartesian(clip = "off") +
theme(
legend.position = "bottom",
axis.text.y = element_text(size = 10.5, angle = 0, margin = margin(r = 10, unit = "pt"), color = "black"),
# panel.grid.major.x = element_line(color = "grey90"),
panel.grid = element_blank(),
plot.margin = margin(t = 20, r = 20, b = 50, l = 20, unit = "pt"),
axis.text.x = element_blank(),
plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
plot.tag.position = "topleft",
plot.title = element_text(hjust = 0.5, size = 16)
) +
geom_vline(xintercept = 0, color = "black", size = 0.5)Code
save_plot("1_rq1_twinlevel_prediction_accuracy", width = 5, height = 8, trim = TRUE)Twin-Level Logistic Regression Coefficients
Code
# Extract all coefficients from twin models and combine
twinmodels_results0 = do.call(rbind, lapply(1:length(twinmodels), function(i) {
vcov_robust = clubSandwich::vcovCR(twinmodels[[i]],
cluster = twinmodels[[i]]$data$famid,
type = "CR2")
coefs = clubSandwich::conf_int(twinmodels[[i]], vcov = vcov_robust, test = "naive-tp")
coefs$outcome = rq1y_twin1[i]
return(coefs)
}))Registered S3 method overwritten by 'clubSandwich':
method from
bread.mlm sandwich
Code
twinmodels_results = twinmodels_results0 %>%
data.frame() %>%
dplyr::rename(term = "Coef") %>%
`rownames<-`(NULL)
# # Extract all coefficients from twin models and combine
# twinmodels_results = do.call(rbind, lapply(1:length(twinmodels), function(i) {
# coefs = broom::tidy(twinmodels[[i]], conf.int = FALSE) %>%
# select(-std.error, -statistic, -p.value)
# coefs$outcome = rq1y_twin1[i]
# return(coefs)
# }))
twinmodels_results$var = str_extract(twinmodels_results$term, paste0("^(", paste(rq1x, collapse = "|"), ")"))
twinmodels_results$var[twinmodels_results0$Coef == "(Intercept)"] = "Intercept"
twinmodels_results$var_outcome = paste(twinmodels_results$var,twinmodels_results$outcome, sep = "_")
# Bonferonni-holm corrected p-value
twinmodels_results$Wald_p = glm_model_comparison_results_df$Wald_p[match(twinmodels_results$var_outcome,glm_model_comparison_results_df$var_outcome)]
twinmodels_results %>%
mutate(
outcome = rq1y_twin_labels_clean_extrashort[match(.$outcome, rq1y_twin1)],
term = case_when(
term == "(Intercept)" ~ "Intercept",
term == "sex1" ~ "Twin Sex (Male)",
term == "amumagetw" ~ "Mother age at birth",
term == "adadagetw" ~ "Father age at birth",
term == "aadultscohabiting biological parent with other" ~ "Household Type: Cohabiting parent",
term == "asinglesingle parent" ~ "Single Parent (ref: cohabiting parents)",
term == "zygos2" ~ "Zygosity (DZ)",
term == "amedtot" ~ "Mother medical risk",
str_starts(term, "afasoc2") & str_detect(term, "^afasoc2\\d$") ~ paste0("Father employment: Level ", str_sub(term, -1)),
term == "afasoc2caring for children at home" ~ "Father employment: Caring for children",
term == "afasoc2no job" ~ "Father employment: No job",
term == "afahqualno qualifications" ~ "Father education: No qualifications",
term == "afahqualCSE grade 2-5 or O-level/GCSE grade D-G" ~ "Father education: CSE/GCSE D-G",
term == "afahqualA-level or S-level" ~ "Father education: A-level",
term == "afahqualHNC" ~ "Father education: HNC",
term == "afahqualHND" ~ "Father education: HND",
term == "afahqualundergraduate degree" ~ "Father education: Undergraduate",
term == "afahqualpostgraduate qualification" ~ "Father education: Postgraduate",
str_starts(term, "amosoc2") & str_detect(term, "^amosoc2\\d$") ~ paste0("Mother employment: Level ", str_sub(term, -1)),
term == "amosoc2no job" ~ "Mother employment: No job",
term == "amohqualCSE grade 2-5 or O-level/GCSE grade D-G" ~ "Mother education: CSE/GCSE D-G",
term == "amohqualCSE grade 1 or O-level/GCSE grade A-C" ~ "Mother education: CSE/GCSE A-C",
term == "amohqualA-level or S-level" ~ "Mother education: A-level",
term == "amohqualHNC" ~ "Mother education: HNC",
term == "amohqualHND" ~ "Mother education: HND",
term == "amohqualundergraduate degree" ~ "Mother education: Undergraduate",
term == "amohqualpostgraduate qualification" ~ "Mother education: Postgraduate",
term == "atwmed1" ~ "Twin medical risk",
term == "aethniccAsian" ~ "Ethnic origin: Asian",
term == "aethniccBlack" ~ "Ethnic origin: Black",
term == "aethniccMixed race" ~ "Ethnic origin: Mixed race",
term == "aethniccOther" ~ "Ethnic origin: Other",
term == "alangEnglish" ~ "Language: English only",
term == "alangEnglish + other" ~ "Language: English + other",
str_starts(term, "anoldsib") & str_detect(term, "\\d+$") ~ paste0("Older siblings: ", str_extract(term, "\\d+")),
term == "anoldsib5 or more" ~ "Older siblings: 5+",
term == "anyngsib1" ~ "Younger siblings: 1",
term == "anyngsib2 or more" ~ "Younger siblings: 2+",
term == "atwclub1" ~ "Twins club member",
term == "alookels1" ~ "Childcare by others",
term == "asmoke1" ~ "Smoking in pregnancy",
term == "adrink1" ~ "Alcohol in pregnancy",
term == "astress1" ~ "Severe stress in pregnancy",
term == "cens01pop98density" ~ "Population density",
term == "pollution1998pca" ~ "Pollution index",
TRUE ~ term
)
) %>%
mutate(
beta = gbtoolbox::apa_num(beta, n_decimal_places = 3),
CI_L = gbtoolbox::apa_num(CI_L, n_decimal_places = 3),
CI_U = gbtoolbox::apa_num(CI_U, n_decimal_places = 3),
SE = gbtoolbox::apa_num(SE, n_decimal_places = 3),
Wald_p_clean = gbtoolbox::apa_num(Wald_p, n_decimal_places = 3),
table_val = paste0(beta,"\n[",CI_L,",\n",CI_U,"]\np=",Wald_p_clean,"\nSE=", SE = SE)
) %>%
select(term, table_val, Wald_p, outcome) %>%
pivot_wider(
id_cols = term,
values_from = c("table_val", "Wald_p"),
names_from = outcome
) %>%
gt() %>%
tab_style(
style = cell_text(whitespace = "pre-wrap"),
locations = cells_body(columns = starts_with("table_val"))
) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y2", rows = Wald_p_Y2 < 0.05)
# ) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y3", rows = Wald_p_Y3 < 0.05)
# ) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y4", rows = Wald_p_Y4 < 0.05)
) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y7", rows = Wald_p_Y7 < 0.05)
# ) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y9", rows = Wald_p_Y9 < 0.05)
# ) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y10", rows = Wald_p_Y10 < 0.05)
# ) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y12 (web test)", rows = `Wald_p_Y12 (web test)` < 0.05)
) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y12 (q'aire)", rows = `Wald_p_Y12 (q'aire)` < 0.05)
) %>%
# tab_style(
# style = cell_fill(color = "#d5e8d4"),
# locations = cells_body(columns = "table_val_Y16 (web)", rows = `Wald_p_Y16 (web)` < 0.05)
# ) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y16 (q'aire)", rows = `Wald_p_Y16 (q'aire)` < 0.05)
) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y18", rows = Wald_p_Y18 < 0.05)
) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y21", rows = Wald_p_Y21 < 0.05)
) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y26 (q'aire)", rows = `Wald_p_Y26 (q'aire)` < 0.05)
) %>%
tab_style(
style = cell_fill(color = "#d5e8d4"),
locations = cells_body(columns = "table_val_Y26 (web test)", rows = `Wald_p_Y26 (web test)` < 0.05)
) %>%
cols_hide(columns = starts_with("Wald_p")) %>%
cols_label_with(
fn = function(x) str_remove(x, "table_val_")
) %>%
tab_options(
table.width = "100%",
table.font.size = "12px"
) %>%
cols_width(
term ~ px(90),
starts_with("table_val") ~ px(40)
) %>%
tab_source_note(
source_note = "Note: Confidence intervals calculated using cluster robust standard errors clustered by family, calculated using the clubSandwich R package with CR2 (bias-reduced linearization) adjustment and naive-tp small sample adjustment. Cluster robust Wald tests were used to calculate using the ClubSandwich R package. Note that for dummy variables, Wald tests were performed by constraining all dummy variables in a given set to zero - therefore dummy variables derived from the same categorical variable have the same p-values."
) | term | Y4 | Y12 (web test) | Y12 (q'aire) | Y16 (q'aire) | Y18 | Y21 | Y26 (q'aire) | Y26 (web test) |
|---|---|---|---|---|---|---|---|---|
| Intercept | -1.193 [-1.642, -.745] p= SE= .229 | -1.688 [-2.129, -1.247] p= SE= .225 | -1.631 [-2.079, -1.183] p= SE= .229 | -1.658 [-2.119, -1.197] p= SE= .235 | -1.495 [-1.946, -1.044] p= SE= .230 | -1.363 [-1.794, -.933] p= SE= .220 | -1.207 [-1.620, -.794] p= SE= .211 | -2.256 [-2.787, -1.724] p= SE= .271 |
| Twin Sex (Male) | -.109 [-.182, -.036] p= .042 SE= .037 | -.231 [-.302, -.160] p= .000 SE= .036 | -.182 [-.254, -.110] p= .000 SE= .037 | -.393 [-.467, -.319] p= .000 SE= .038 | -.253 [-.326, -.181] p= .000 SE= .037 | -.682 [-.752, -.613] p= .000 SE= .035 | -.739 [-.806, -.672] p= .000 SE= .034 | -.688 [-.776, -.601] p= .000 SE= .045 |
| Mother age at birth | .035 [ .025, .046] p= .000 SE= .005 | .027 [ .017, .037] p= .000 SE= .005 | .028 [ .018, .039] p= .000 SE= .005 | .031 [ .021, .042] p= .000 SE= .005 | .047 [ .037, .058] p= .000 SE= .005 | .014 [ .004, .024] p= .050 SE= .005 | .014 [ .004, .023] p= .073 SE= .005 | .012 [-.000, .025] p= .471 SE= .006 |
| Father age at birth | .010 [ .002, .019] p= .113 SE= .004 | .005 [-.003, .013] p= .735 SE= .004 | .004 [-.004, .012] p=1.000 SE= .004 | -.001 [-.009, .007] p=1.000 SE= .004 | .005 [-.003, .013] p=1.000 SE= .004 | .009 [ .002, .017] p= .168 SE= .004 | .007 [-.001, .014] p= .795 SE= .004 | .008 [-.002, .017] p= .857 SE= .005 |
| Single Parent (ref: cohabiting parents) | -.412 [-.571, -.253] p= .000 SE= .081 | -.415 [-.580, -.249] p= .000 SE= .084 | -.351 [-.516, -.185] p= .001 SE= .085 | -.554 [-.733, -.376] p= .000 SE= .091 | -.447 [-.609, -.285] p= .000 SE= .083 | -.607 [-.773, -.441] p= .000 SE= .085 | -.455 [-.614, -.297] p= .000 SE= .081 | -.475 [-.694, -.255] p= .000 SE= .112 |
| Zygosity (DZ) | -.160 [-.239, -.081] p= .001 SE= .040 | -.343 [-.419, -.266] p= .000 SE= .039 | -.323 [-.401, -.246] p= .000 SE= .040 | -.286 [-.366, -.207] p= .000 SE= .040 | -.311 [-.390, -.233] p= .000 SE= .040 | -.290 [-.364, -.215] p= .000 SE= .038 | -.271 [-.343, -.199] p= .000 SE= .037 | -.329 [-.421, -.238] p= .000 SE= .047 |
| Mother medical risk | -.025 [-.070, .021] p=1.000 SE= .023 | -.043 [-.087, .001] p= .395 SE= .023 | -.021 [-.066, .024] p=1.000 SE= .023 | -.054 [-.101, -.008] p= .206 SE= .024 | -.082 [-.127, -.038] p= .003 SE= .023 | -.021 [-.063, .022] p=1.000 SE= .022 | -.030 [-.072, .011] p= .924 SE= .021 | -.034 [-.088, .019] p=1.000 SE= .027 |
| Father employment: Level 2 | .138 [-.002, .278] p= .923 SE= .071 | .092 [-.040, .223] p= .444 SE= .067 | .045 [-.088, .178] p=1.000 SE= .068 | .105 [-.028, .239] p= .927 SE= .068 | .138 [ .000, .275] p= .052 SE= .070 | .125 [-.001, .252] p= .378 SE= .065 | .113 [-.008, .233] p= .795 SE= .061 | .148 [ .002, .295] p=1.000 SE= .075 |
| Father employment: Level 3 | -.078 [-.227, .071] p= .923 SE= .076 | .187 [ .044, .331] p= .444 SE= .073 | .101 [-.044, .246] p=1.000 SE= .074 | .055 [-.094, .203] p= .927 SE= .076 | .067 [-.081, .215] p= .052 SE= .076 | .079 [-.060, .218] p= .378 SE= .071 | .055 [-.080, .189] p= .795 SE= .069 | .111 [-.054, .276] p=1.000 SE= .084 |
| Father employment: Level 4 | .037 [-.164, .238] p= .923 SE= .102 | .245 [ .049, .442] p= .444 SE= .100 | .178 [-.021, .377] p=1.000 SE= .101 | .160 [-.044, .364] p= .927 SE= .104 | .046 [-.154, .246] p= .052 SE= .102 | .115 [-.077, .306] p= .378 SE= .098 | .119 [-.068, .305] p= .795 SE= .095 | .157 [-.081, .395] p=1.000 SE= .121 |
| Father employment: Level 5 | -.048 [-.166, .071] p= .923 SE= .060 | .009 [-.107, .124] p= .444 SE= .059 | -.020 [-.136, .097] p=1.000 SE= .059 | -.097 [-.217, .024] p= .927 SE= .061 | -.104 [-.221, .013] p= .052 SE= .060 | -.114 [-.226, -.001] p= .378 SE= .057 | -.079 [-.189, .030] p= .795 SE= .056 | -.040 [-.184, .104] p=1.000 SE= .074 |
| Father employment: Level 6 | -.097 [-.273, .078] p= .923 SE= .090 | .134 [-.036, .303] p= .444 SE= .087 | .101 [-.071, .273] p=1.000 SE= .088 | .146 [-.029, .322] p= .927 SE= .090 | .043 [-.129, .215] p= .052 SE= .088 | -.031 [-.196, .135] p= .378 SE= .085 | .062 [-.099, .222] p= .795 SE= .082 | .113 [-.090, .317] p=1.000 SE= .104 |
| Father employment: Level 7 | -.230 [-.455, -.005] p= .923 SE= .115 | .211 [-.012, .433] p= .444 SE= .113 | .139 [-.089, .367] p=1.000 SE= .116 | -.050 [-.286, .186] p= .927 SE= .120 | .010 [-.212, .231] p= .052 SE= .113 | -.077 [-.299, .144] p= .378 SE= .113 | -.044 [-.248, .160] p= .795 SE= .104 | .027 [-.239, .293] p=1.000 SE= .136 |
| Father employment: Level 8 | .022 [-.127, .172] p= .923 SE= .076 | .032 [-.115, .180] p= .444 SE= .075 | -.065 [-.215, .084] p=1.000 SE= .076 | .012 [-.141, .166] p= .927 SE= .078 | -.075 [-.223, .072] p= .052 SE= .075 | -.044 [-.187, .099] p= .378 SE= .073 | .006 [-.132, .143] p= .795 SE= .070 | -.068 [-.259, .122] p=1.000 SE= .097 |
| Father employment: Level 9 | .075 [-.123, .273] p= .923 SE= .101 | .134 [-.063, .330] p= .444 SE= .100 | .126 [-.072, .324] p=1.000 SE= .101 | .049 [-.158, .256] p= .927 SE= .106 | .130 [-.066, .325] p= .052 SE= .100 | -.051 [-.245, .142] p= .378 SE= .099 | -.075 [-.263, .113] p= .795 SE= .096 | .005 [-.248, .258] p=1.000 SE= .129 |
| Father employment: Caring for children | .010 [-.255, .276] p= .923 SE= .135 | .016 [-.243, .275] p= .444 SE= .132 | .026 [-.238, .289] p=1.000 SE= .135 | .014 [-.262, .289] p= .927 SE= .140 | -.202 [-.462, .058] p= .052 SE= .133 | -.120 [-.384, .143] p= .378 SE= .134 | -.128 [-.386, .130] p= .795 SE= .132 | .026 [-.303, .355] p=1.000 SE= .168 |
| Father employment: No job | -.006 [-.182, .169] p= .923 SE= .090 | -.000 [-.178, .177] p= .444 SE= .091 | -.012 [-.192, .168] p=1.000 SE= .092 | -.087 [-.274, .100] p= .927 SE= .096 | -.247 [-.425, -.068] p= .052 SE= .091 | -.185 [-.359, -.010] p= .378 SE= .089 | -.177 [-.349, -.006] p= .795 SE= .087 | -.043 [-.269, .183] p=1.000 SE= .116 |
| Father education: No qualifications | -.184 [-.315, -.053] p= .099 SE= .067 | -.209 [-.341, -.076] p= .059 SE= .067 | -.156 [-.291, -.022] p= .043 SE= .068 | -.171 [-.311, -.030] p= .206 SE= .072 | -.184 [-.315, -.054] p= .091 SE= .067 | -.183 [-.313, -.054] p= .023 SE= .066 | -.210 [-.337, -.083] p= .443 SE= .065 | -.191 [-.366, -.017] p= .024 SE= .089 |
| Father education: CSE/GCSE D-G | -.100 [-.219, .018] p= .099 SE= .061 | -.096 [-.214, .022] p= .059 SE= .060 | -.064 [-.184, .056] p= .043 SE= .061 | -.105 [-.229, .019] p= .206 SE= .063 | -.078 [-.196, .040] p= .091 SE= .060 | -.131 [-.247, -.014] p= .023 SE= .059 | -.068 [-.180, .044] p= .443 SE= .057 | -.043 [-.193, .108] p= .024 SE= .077 |
| Father education: A-level | -.153 [-.281, -.024] p= .099 SE= .066 | -.136 [-.261, -.010] p= .059 SE= .064 | -.115 [-.243, .012] p= .043 SE= .065 | -.032 [-.164, .099] p= .206 SE= .067 | -.070 [-.198, .057] p= .091 SE= .065 | -.038 [-.162, .085] p= .023 SE= .063 | .007 [-.112, .126] p= .443 SE= .061 | .017 [-.138, .172] p= .024 SE= .079 |
| Father education: HNC | .060 [-.106, .227] p= .099 SE= .085 | .013 [-.146, .173] p= .059 SE= .081 | .093 [-.068, .255] p= .043 SE= .082 | .076 [-.089, .240] p= .206 SE= .084 | .113 [-.050, .276] p= .091 SE= .083 | .002 [-.151, .156] p= .023 SE= .078 | -.000 [-.150, .149] p= .443 SE= .076 | .021 [-.173, .215] p= .024 SE= .099 |
| Father education: HND | -.120 [-.296, .057] p= .099 SE= .090 | .049 [-.119, .217] p= .059 SE= .086 | -.024 [-.195, .147] p= .043 SE= .087 | .040 [-.133, .213] p= .206 SE= .088 | -.074 [-.246, .098] p= .091 SE= .088 | .021 [-.143, .185] p= .023 SE= .084 | .015 [-.144, .174] p= .443 SE= .081 | .071 [-.129, .271] p= .024 SE= .102 |
| Father education: Undergraduate | .036 [-.112, .185] p= .099 SE= .076 | .099 [-.043, .240] p= .059 SE= .072 | .168 [ .025, .311] p= .043 SE= .073 | .136 [-.009, .282] p= .206 SE= .074 | .094 [-.051, .240] p= .091 SE= .074 | .164 [ .029, .300] p= .023 SE= .069 | .064 [-.067, .194] p= .443 SE= .067 | .268 [ .106, .429] p= .024 SE= .083 |
| Father education: Postgraduate | -.137 [-.311, .038] p= .099 SE= .089 | -.003 [-.168, .163] p= .059 SE= .084 | .053 [-.115, .222] p= .043 SE= .086 | .144 [-.027, .315] p= .206 SE= .087 | -.078 [-.250, .094] p= .091 SE= .088 | .148 [-.011, .307] p= .023 SE= .081 | .063 [-.089, .216] p= .443 SE= .078 | .269 [ .079, .459] p= .024 SE= .097 |
| Mother employment: Level 1 | -.294 [-.458, -.130] p= .047 SE= .084 | -.009 [-.164, .146] p= .000 SE= .079 | .031 [-.127, .190] p= .001 SE= .081 | -.042 [-.202, .117] p= .000 SE= .081 | .120 [-.041, .281] p= .000 SE= .082 | .029 [-.121, .179] p= .004 SE= .076 | .028 [-.116, .172] p= .126 SE= .073 | -.011 [-.192, .169] p=1.000 SE= .092 |
| Mother employment: Level 2 | -.096 [-.288, .095] p= .047 SE= .098 | .177 [ .003, .352] p= .000 SE= .089 | .071 [-.105, .247] p= .001 SE= .090 | .042 [-.132, .217] p= .000 SE= .089 | .193 [ .005, .381] p= .000 SE= .096 | .079 [-.088, .245] p= .004 SE= .085 | -.018 [-.174, .138] p= .126 SE= .080 | -.094 [-.284, .095] p=1.000 SE= .097 |
| Mother employment: Level 3 | -.035 [-.196, .125] p= .047 SE= .082 | .325 [ .176, .474] p= .000 SE= .076 | .299 [ .146, .452] p= .001 SE= .078 | .289 [ .137, .441] p= .000 SE= .078 | .449 [ .289, .608] p= .000 SE= .081 | .243 [ .103, .384] p= .004 SE= .072 | .218 [ .084, .353] p= .126 SE= .069 | .066 [-.101, .234] p=1.000 SE= .086 |
| Mother employment: Level 4 | -.075 [-.201, .052] p= .047 SE= .065 | .039 [-.084, .163] p= .000 SE= .063 | -.031 [-.155, .094] p= .001 SE= .064 | -.033 [-.161, .095] p= .000 SE= .065 | .083 [-.043, .208] p= .000 SE= .064 | -.050 [-.172, .072] p= .004 SE= .062 | -.020 [-.138, .098] p= .126 SE= .060 | -.125 [-.280, .029] p=1.000 SE= .079 |
| Mother employment: Level 5 | -.153 [-.571, .265] p= .047 SE= .213 | .188 [-.207, .583] p= .000 SE= .201 | -.030 [-.429, .368] p= .001 SE= .203 | .019 [-.412, .449] p= .000 SE= .220 | .081 [-.338, .500] p= .000 SE= .214 | .067 [-.313, .448] p= .004 SE= .194 | .272 [-.076, .621] p= .126 SE= .178 | .101 [-.349, .552] p=1.000 SE= .230 |
| Mother employment: Level 6 | -.141 [-.302, .020] p= .047 SE= .082 | .118 [-.041, .277] p= .000 SE= .081 | .065 [-.096, .226] p= .001 SE= .082 | .015 [-.149, .179] p= .000 SE= .084 | -.052 [-.212, .108] p= .000 SE= .082 | -.079 [-.233, .074] p= .004 SE= .078 | -.020 [-.171, .131] p= .126 SE= .077 | -.188 [-.388, .012] p=1.000 SE= .102 |
| Mother employment: Level 7 | -.186 [-.366, -.007] p= .047 SE= .091 | -.004 [-.181, .174] p= .000 SE= .091 | -.088 [-.267, .092] p= .001 SE= .091 | -.226 [-.415, -.037] p= .000 SE= .097 | -.214 [-.393, -.036] p= .000 SE= .091 | -.204 [-.381, -.026] p= .004 SE= .090 | -.111 [-.283, .060] p= .126 SE= .088 | -.074 [-.301, .154] p=1.000 SE= .116 |
| Mother employment: Level 8 | -.052 [-.415, .311] p= .047 SE= .185 | -.047 [-.415, .320] p= .000 SE= .188 | -.194 [-.571, .183] p= .001 SE= .192 | -.334 [-.748, .080] p= .000 SE= .211 | -.373 [-.732, -.014] p= .000 SE= .183 | -.342 [-.727, .043] p= .004 SE= .196 | -.083 [-.441, .276] p= .126 SE= .183 | -.181 [-.665, .302] p=1.000 SE= .247 |
| Mother employment: Level 9 | .028 [-.225, .281] p= .047 SE= .129 | .215 [-.037, .467] p= .000 SE= .129 | .158 [-.099, .414] p= .001 SE= .131 | -.084 [-.354, .187] p= .000 SE= .138 | .041 [-.215, .296] p= .000 SE= .130 | .066 [-.182, .314] p= .004 SE= .127 | .085 [-.149, .319] p= .126 SE= .119 | .016 [-.298, .330] p=1.000 SE= .160 |
| Mother employment: No job | -.206 [-.319, -.094] p= .047 SE= .057 | -.172 [-.286, -.058] p= .000 SE= .058 | -.212 [-.327, -.096] p= .001 SE= .059 | -.279 [-.401, -.157] p= .000 SE= .062 | -.189 [-.302, -.076] p= .000 SE= .057 | -.163 [-.275, -.051] p= .004 SE= .057 | -.122 [-.231, -.012] p= .126 SE= .056 | -.157 [-.305, -.009] p=1.000 SE= .075 |
| Mother education: CSE/GCSE D-G | .301 [ .147, .454] p= .000 SE= .078 | .211 [ .046, .376] p= .000 SE= .084 | .206 [ .040, .372] p= .000 SE= .085 | .198 [ .021, .375] p= .000 SE= .090 | .292 [ .132, .452] p= .000 SE= .082 | .242 [ .078, .406] p= .000 SE= .084 | .140 [-.019, .300] p= .000 SE= .081 | .018 [-.209, .244] p= .000 SE= .116 |
| Mother education: CSE/GCSE A-C | .438 [ .296, .580] p= .000 SE= .072 | .499 [ .349, .650] p= .000 SE= .077 | .428 [ .276, .579] p= .000 SE= .077 | .422 [ .260, .584] p= .000 SE= .083 | .546 [ .399, .693] p= .000 SE= .075 | .468 [ .318, .618] p= .000 SE= .077 | .342 [ .196, .489] p= .000 SE= .075 | .300 [ .095, .505] p= .000 SE= .105 |
| Mother education: A-level | .463 [ .297, .629] p= .000 SE= .085 | .647 [ .475, .819] p= .000 SE= .088 | .592 [ .419, .765] p= .000 SE= .088 | .612 [ .430, .794] p= .000 SE= .093 | .690 [ .520, .859] p= .000 SE= .087 | .588 [ .418, .757] p= .000 SE= .087 | .524 [ .359, .689] p= .000 SE= .084 | .521 [ .298, .744] p= .000 SE= .114 |
| Mother education: HNC | .704 [ .445, .963] p= .000 SE= .132 | .717 [ .463, .972] p= .000 SE= .130 | .587 [ .330, .844] p= .000 SE= .131 | .540 [ .277, .803] p= .000 SE= .134 | .731 [ .476, .986] p= .000 SE= .130 | .612 [ .370, .855] p= .000 SE= .124 | .503 [ .264, .742] p= .000 SE= .122 | .416 [ .100, .731] p= .000 SE= .161 |
| Mother education: HND | .561 [ .329, .793] p= .000 SE= .118 | .767 [ .539, .996] p= .000 SE= .117 | .717 [ .484, .950] p= .000 SE= .119 | .495 [ .255, .735] p= .000 SE= .123 | .751 [ .520, .982] p= .000 SE= .118 | .441 [ .216, .666] p= .000 SE= .115 | .323 [ .108, .537] p= .000 SE= .109 | .290 [ .003, .577] p= .000 SE= .146 |
| Mother education: Undergraduate | .765 [ .573, .957] p= .000 SE= .098 | .829 [ .639, 1.019] p= .000 SE= .097 | .766 [ .574, .958] p= .000 SE= .098 | .741 [ .542, .941] p= .000 SE= .102 | .887 [ .694, 1.079] p= .000 SE= .098 | .752 [ .566, .938] p= .000 SE= .095 | .692 [ .512, .871] p= .000 SE= .091 | .502 [ .264, .740] p= .000 SE= .122 |
| Mother education: Postgraduate | .956 [ .717, 1.195] p= .000 SE= .122 | .709 [ .483, .934] p= .000 SE= .115 | .631 [ .403, .860] p= .000 SE= .117 | .609 [ .374, .844] p= .000 SE= .120 | .815 [ .584, 1.046] p= .000 SE= .118 | .750 [ .531, .970] p= .000 SE= .112 | .690 [ .479, .900] p= .000 SE= .107 | .568 [ .298, .839] p= .000 SE= .138 |
| Twin medical risk | -.003 [-.044, .039] p=1.000 SE= .021 | -.031 [-.071, .009] p= .664 SE= .021 | -.041 [-.082, -.000] p= .381 SE= .021 | -.022 [-.064, .019] p=1.000 SE= .021 | -.013 [-.054, .028] p=1.000 SE= .021 | -.008 [-.047, .031] p=1.000 SE= .020 | .002 [-.036, .039] p=1.000 SE= .019 | .052 [ .005, .100] p= .345 SE= .024 |
| Ethnic origin: Asian | .052 [-.258, .362] p= .031 SE= .158 | .087 [-.227, .401] p= .005 SE= .160 | .126 [-.193, .444] p= .042 SE= .163 | .171 [-.151, .493] p= .018 SE= .164 | .262 [-.056, .579] p= .004 SE= .162 | .347 [ .053, .641] p= .000 SE= .150 | .016 [-.266, .297] p= .001 SE= .144 | .402 [ .078, .726] p= .002 SE= .165 |
| Ethnic origin: Black | -.524 [-.819, -.230] p= .031 SE= .150 | -.453 [-.766, -.141] p= .005 SE= .159 | -.509 [-.825, -.193] p= .042 SE= .161 | -.580 [-.923, -.236] p= .018 SE= .175 | -.311 [-.613, -.010] p= .004 SE= .154 | -.523 [-.839, -.207] p= .000 SE= .161 | -.529 [-.820, -.239] p= .001 SE= .148 | -.864 [-1.366, -.361] p= .002 SE= .256 |
| Ethnic origin: Mixed race | -.139 [-.354, .077] p= .031 SE= .110 | -.397 [-.621, -.174] p= .005 SE= .114 | -.257 [-.481, -.033] p= .042 SE= .114 | -.279 [-.513, -.044] p= .018 SE= .120 | -.341 [-.563, -.119] p= .004 SE= .113 | -.383 [-.603, -.163] p= .000 SE= .112 | -.385 [-.598, -.173] p= .001 SE= .108 | -.281 [-.556, -.006] p= .002 SE= .140 |
| Ethnic origin: Other | -.396 [-.843, .050] p= .031 SE= .228 | -.125 [-.597, .347] p= .005 SE= .241 | .065 [-.395, .526] p= .042 SE= .235 | .027 [-.466, .520] p= .018 SE= .252 | -.493 [-.989, .003] p= .004 SE= .253 | -.159 [-.628, .310] p= .000 SE= .239 | .140 [-.327, .608] p= .001 SE= .238 | .224 [-.333, .780] p= .002 SE= .284 |
| Language: English only | .241 [-.064, .546] p= .324 SE= .156 | .260 [-.041, .562] p= .059 SE= .154 | .281 [-.028, .590] p= .077 SE= .158 | .266 [-.049, .581] p=1.000 SE= .161 | -.105 [-.415, .206] p=1.000 SE= .158 | .259 [-.031, .548] p= .498 SE= .148 | .078 [-.199, .355] p=1.000 SE= .141 | .137 [-.214, .489] p= .857 SE= .179 |
| Language: English + other | -.112 [-.482, .258] p= .324 SE= .189 | -.232 [-.615, .152] p= .059 SE= .196 | -.199 [-.590, .193] p= .077 SE= .200 | .040 [-.367, .447] p=1.000 SE= .207 | -.074 [-.452, .303] p=1.000 SE= .193 | -.021 [-.386, .344] p= .498 SE= .186 | -.181 [-.536, .173] p=1.000 SE= .181 | -.230 [-.691, .231] p= .857 SE= .235 |
| Older siblings: 1 | -.204 [-.289, -.118] p= .000 SE= .044 | -.018 [-.101, .064] p= .003 SE= .042 | -.053 [-.137, .030] p= .007 SE= .043 | -.074 [-.159, .012] p= .014 SE= .044 | -.132 [-.216, -.048] p= .000 SE= .043 | -.093 [-.174, -.013] p= .005 SE= .041 | -.112 [-.190, -.035] p= .061 SE= .040 | -.117 [-.217, -.018] p= .125 SE= .051 |
| Older siblings: 2 | -.289 [-.407, -.171] p= .000 SE= .060 | -.097 [-.212, .019] p= .003 SE= .059 | -.165 [-.282, -.047] p= .007 SE= .060 | -.176 [-.298, -.054] p= .014 SE= .062 | -.295 [-.412, -.178] p= .000 SE= .060 | -.179 [-.292, -.066] p= .005 SE= .058 | -.196 [-.307, -.085] p= .061 SE= .057 | -.107 [-.248, .035] p= .125 SE= .072 |
| Older siblings: 3 | -.564 [-.747, -.380] p= .000 SE= .094 | -.223 [-.415, -.032] p= .003 SE= .098 | -.257 [-.450, -.064] p= .007 SE= .098 | -.303 [-.509, -.098] p= .014 SE= .105 | -.414 [-.605, -.222] p= .000 SE= .098 | -.282 [-.470, -.093] p= .005 SE= .096 | -.173 [-.355, .008] p= .061 SE= .093 | -.342 [-.598, -.087] p= .125 SE= .130 |
| Older siblings: 4 | -.525 [-.875, -.174] p= .000 SE= .179 | -.673 [-1.068, -.277] p= .003 SE= .202 | -.505 [-.882, -.128] p= .007 SE= .192 | -.586 [-.997, -.174] p= .014 SE= .210 | -.517 [-.886, -.147] p= .000 SE= .188 | -.559 [-.931, -.188] p= .005 SE= .189 | -.045 [-.398, .308] p= .061 SE= .180 | -.278 [-.757, .202] p= .125 SE= .245 |
| Older siblings: 5+ | -1.061 [-1.652, -.470] p= .000 SE= .301 | -1.074 [-1.792, -.356] p= .003 SE= .366 | -.858 [-1.532, -.185] p= .007 SE= .344 | -.404 [-1.071, .262] p= .014 SE= .340 | -.990 [-1.637, -.343] p= .000 SE= .330 | -.371 [-.951, .208] p= .005 SE= .296 | -.387 [-.991, .217] p= .061 SE= .308 | -1.221 [-2.308, -.135] p= .125 SE= .554 |
| Younger siblings: 1 | -.187 [-.398, .025] p= .936 SE= .108 | -.200 [-.412, .012] p= .709 SE= .108 | -.248 [-.466, -.029] p= .588 SE= .111 | -.121 [-.341, .099] p=1.000 SE= .112 | -.134 [-.347, .079] p=1.000 SE= .109 | -.128 [-.336, .081] p=1.000 SE= .106 | -.142 [-.346, .062] p= .900 SE= .104 | -.037 [-.290, .215] p=1.000 SE= .129 |
| Younger siblings: 2+ | -.232 [-.949, .485] p= .936 SE= .366 | -.073 [-.747, .600] p= .709 SE= .344 | -.045 [-.762, .672] p= .588 SE= .366 | -.232 [-.955, .491] p=1.000 SE= .369 | -.086 [-.708, .536] p=1.000 SE= .317 | -.443 [-1.083, .197] p=1.000 SE= .326 | -.584 [-1.340, .172] p= .900 SE= .386 | -1.089 [-2.313, .135] p=1.000 SE= .625 |
| Twins club member | .215 [ .133, .298] p= .000 SE= .042 | .318 [ .240, .396] p= .000 SE= .040 | .276 [ .197, .356] p= .000 SE= .040 | .308 [ .228, .388] p= .000 SE= .041 | .248 [ .168, .329] p= .000 SE= .041 | .315 [ .239, .391] p= .000 SE= .039 | .243 [ .170, .316] p= .000 SE= .037 | .280 [ .189, .371] p= .000 SE= .047 |
| Childcare by others | -.136 [-.244, -.027] p= .114 SE= .055 | -.038 [-.144, .068] p= .967 SE= .054 | -.059 [-.166, .049] p=1.000 SE= .055 | -.047 [-.158, .063] p=1.000 SE= .056 | .130 [ .022, .239] p= .151 SE= .056 | -.018 [-.121, .084] p=1.000 SE= .052 | .020 [-.079, .120] p=1.000 SE= .051 | .028 [-.099, .154] p=1.000 SE= .064 |
| Smoking in pregnancy | -.222 [-.323, -.120] p= .000 SE= .052 | -.147 [-.250, -.044] p= .059 SE= .052 | -.227 [-.332, -.123] p= .000 SE= .053 | -.263 [-.373, -.153] p= .000 SE= .056 | -.116 [-.218, -.014] p= .182 SE= .052 | -.153 [-.254, -.052] p= .035 SE= .051 | -.088 [-.186, .010] p= .795 SE= .050 | -.169 [-.299, -.039] p= .132 SE= .066 |
| Alcohol in pregnancy | -.013 [-.099, .073] p=1.000 SE= .044 | .087 [ .005, .170] p= .303 SE= .042 | .064 [-.020, .147] p= .812 SE= .043 | .007 [-.079, .092] p=1.000 SE= .044 | .036 [-.048, .121] p=1.000 SE= .043 | -.053 [-.134, .027] p=1.000 SE= .041 | -.051 [-.128, .027] p=1.000 SE= .040 | -.100 [-.199, -.001] p= .471 SE= .050 |
| Severe stress in pregnancy | -.018 [-.114, .077] p=1.000 SE= .049 | -.023 [-.117, .071] p= .967 SE= .048 | .014 [-.081, .110] p=1.000 SE= .049 | .026 [-.072, .124] p=1.000 SE= .050 | .005 [-.089, .099] p=1.000 SE= .048 | .010 [-.082, .102] p=1.000 SE= .047 | .007 [-.082, .096] p=1.000 SE= .045 | -.029 [-.142, .085] p=1.000 SE= .058 |
| Pollution index | -.102 [-.141, -.064] p= .000 SE= .020 | -.053 [-.091, -.016] p= .059 SE= .019 | -.048 [-.086, -.010] p= .118 SE= .019 | -.056 [-.095, -.017] p= .053 SE= .020 | -.094 [-.132, -.056] p= .000 SE= .019 | -.050 [-.086, -.013] p= .075 SE= .019 | -.062 [-.097, -.027] p= .008 SE= .018 | -.084 [-.129, -.039] p= .003 SE= .023 |
| Note: Confidence intervals calculated using cluster robust standard errors clustered by family, calculated using the clubSandwich R package with CR2 (bias-reduced linearization) adjustment and naive-tp small sample adjustment. Cluster robust Wald tests were used to calculate using the ClubSandwich R package. Note that for dummy variables, Wald tests were performed by constraining all dummy variables in a given set to zero - therefore dummy variables derived from the same categorical variable have the same p-values. | ||||||||
Plot Marginal Effects of maternal education
Code
# Calculate average predictions for maternal education variables
# Calculate predictions for all models
all_predictions = map_dfr(1:length(twinmodels), function(i) {
data_grid = marginaleffects::datagrid(
model = twinmodels[[i]],
amohqual = unique
)
pred = predict(twinmodels[[i]], newdata = data_grid, type = "response")
cbind.data.frame(data_grid, estimate = pred) %>%
mutate(
outcome = rq1y_twin_labels_clean_extrashort[i],
amohqual = factor(amohqual, levels = levels(df$amohqual))
)
})
# Plot all predictions together
all_predictions %>%
mutate(
outcome = factor(outcome, levels = rq1y_twin_labels_clean_extrashort)
) %>%
ggplot(aes(x = amohqual, y = estimate, group = outcome, color = outcome)) +
geom_point() +
geom_line() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Predicted Participation by Maternal Education Level",
subtitle = "Across all study timepoints",
x = "Maternal Education Level",
y = "Predicted Probability",
color = "Study Wave",
caption = "Note: Predictions calculated using logistic regression models fitted separately for each study wave.\nAll other covariates held at their observed means (continuous) or modes (categorical)."
)Descriptive Statistics
Total number of families in dataset: 13945
Total number of families with no predictor information (and are just completely removed from the analysis): 0
Participation rates at each timepoint
Code
dtwdata1 lcwdata1 lcqdata1 pcbhdata1 rcqdata1 u1cdata1 zmhdata1 zcdata1
0.6001152 0.4415899 0.4401306 0.3807988 0.5290323 0.3747312 0.3263057 0.1547235
Code
df %>%
select(all_of(rq1y_twin1)) %>%
mutate(across(everything(), as.character)) %>%
# rename_from_labels() %>%
pivot_longer(cols = everything()) %>%
group_by(name) %>%
count(value) %>%
# mutate(name_count = n()) %>%
# filter(name_count < 10) %>%
# select(-name_count) %>%
knitr::kable()| name | value | n |
|---|---|---|
| dtwdata1 | 0 | 10413 |
| dtwdata1 | 1 | 15627 |
| lcqdata1 | 0 | 14579 |
| lcqdata1 | 1 | 11461 |
| lcwdata1 | 0 | 14541 |
| lcwdata1 | 1 | 11499 |
| pcbhdata1 | 0 | 16124 |
| pcbhdata1 | 1 | 9916 |
| rcqdata1 | 0 | 12264 |
| rcqdata1 | 1 | 13776 |
| u1cdata1 | 0 | 16282 |
| u1cdata1 | 1 | 9758 |
| zcdata1 | 0 | 22011 |
| zcdata1 | 1 | 4029 |
| zmhdata1 | 0 | 17543 |
| zmhdata1 | 1 | 8497 |
Code
df %>%
select(all_of(rq1y_twin1)) %>%
`colnames<-`(rq1y_twin_labels_clean_extrashort) %>%
# select(where(is.numeric)) %>%
mutate(across(everything(), as.character)) %>%
mutate(across(everything(), ~as.character(replace_na(., "NA")))) %>%
pivot_longer(cols = everything()) %>%
mutate(name = factor(name, levels = rq1y_twin_labels_clean_extrashort)) %>%
ggplot(aes(x=value)) +
geom_bar() +
facet_wrap(~name, ncol = 4, scales = "free")Predictor variables (rq1x)
Distribution
Code
df_rq1x %>%
select(!where(is.numeric)) %>%
rename_from_labels() %>%
mutate(across(everything(), as.character)) %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(x=value)) +
geom_bar() +
facet_wrap(~name, ncol = 4, scales = "free") +
theme(
axis.text.x = element_text(
angle = 20,
hjust = 1,
vjust = 1, # Middle vertical alignment
size = 8 # Adjust text size if needed
))Missingness
Code
df_rq1x %>%
mutate(across(everything(), ~ as.numeric(!is.na(.)))) %>%
pivot_longer(
cols = everything()
) %>%
group_by(name) %>%
summarise(percent_notmissing = mean(value)) %>%
knitr::kable()| name | percent_notmissing |
|---|---|
| adadagetw | 0.8824885 |
| adrink | 0.9900922 |
| aethnicc | 0.9963902 |
| afahqual | 0.8852535 |
| afasoc2 | 0.8877112 |
| alang | 0.9860983 |
| alookels | 0.9520737 |
| amedtot | 0.9932412 |
| amohqual | 0.9842550 |
| amosoc2 | 0.9850230 |
| amumagetw | 0.9816436 |
| anoldsib | 1.0000000 |
| anyngsib | 1.0000000 |
| asingle | 0.9827957 |
| asmoke | 0.9958525 |
| astress | 0.9942396 |
| atwclub | 0.9656682 |
| atwmed1 | 0.9921659 |
| pollution1998pca | 0.8841782 |
| sex1 | 1.0000000 |
| zygos | 1.0000000 |
Missing Data Patterns
Code
df_rq1x %>%
`colnames<-`((rq1x_labels)) %>%
as_tibble() %>%
ggmice::plot_pattern(., rotate = TRUE)Code
save_plot("1_rq1_misssing_data_pattern", width = 12, height = 30)
df_rq1_imputed %>%
select(rq1x) %>%
data.frame() %>%
`colnames<-`((rq1x_labels)) %>%
as_tibble() %>%
ggmice::plot_pattern(., rotate = TRUE)Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(rq1x)
# Now:
data %>% select(all_of(rq1x))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
/\ /\
{ `---' }
{ O O }
==> V <== No need for mice. This data set is completely observed.
\ \|/ /
`-----'
Code
save_plot("1_rq1_misssing_data_pattern_imputed", width = 12, height = 30)Predicting missingness of SDQ/MFQ data
Data Prep
1 = Not missing
Note that at Yr16, the emotion subtest was not administered to parents, so the conduct one is used instead.
Code
list_of_sdq_var = c(
# "bsdqcemot1",
# "csdqcemot1",
# "dsdqemot1",
# "gpsdqemot1",
# "icsdqemot1", "ipsdqemot1",
"lcsdqemot1", "lpsdqemot1",
"pcbhsdqemot1", "ppbhsdqcont1",
"u1csdqemot1", "ucv1sdqemot1", "u1psdqemot1",
"zmhsdqemot1"
)
rq1m = c(
"lcsdqemot1", "lpsdqemot1", "lcmfqt1", "lpmfqt1",
"pcbhsdqemot1", "ppbhsdqcont1", "pcbhmfqt1", "ppbhmfqt1",
"u1csdqemot1", "ucv1sdqemot1", "u1psdqemot1", "u1cmfqt1", "ucv1mfqt1",
"zmhsdqemot1", "zmhmfqt1"
)
rq1m_childmeasures = c(
"lcsdqemot1", "lcmfqt1",
"pcbhsdqemot1", "pcbhmfqt1",
"u1csdqemot1", "ucv1sdqemot1", "u1cmfqt1", "ucv1mfqt1",
"zmhsdqemot1", "zmhmfqt1"
)
rq1m_parentmeasures = c(
"lpsdqemot1", "lpmfqt1",
"ppbhsdqcont1","ppbhmfqt1",
"u1psdqemot1"
)
missing_outcomes = df %>%
filter(acontact == 1) %>%
select(all_of(rq1m)) %>%
sapply(.,function(x) as.numeric(!is.na(x)))
# missing_outcomes = sapply(xx,function(x) as.numeric(is.na(x)))Descriptives
Code
Code
# Check how concordant missingness is between twins
df %>%
filter(acontact == 1) %>%
select("randomfamid", "twin", all_of(rq1m)) %>%
# rename_from_labels() %>%
pivot_wider(id_cols = randomfamid, values_from = rq1m, names_from = twin) %>%
mutate(
across(
ends_with("1") | ends_with("2"),
~as.numeric(!is.na(.))
)
) %>%
select(-randomfamid) %>%
gbtoolbox::plot_correlations(
confidence_interval = FALSE,
sample_size = FALSE
)Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use.
Proceed with caution
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(rq1m)
# Now:
data %>% select(all_of(rq1m))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
Code
df %>%
filter(acontact == 1) %>%
select("randomfamid", "twin", all_of(rq1m_childmeasures)) %>%
# rename_from_labels() %>%
pivot_wider(id_cols = randomfamid, values_from = rq1m_childmeasures, names_from = twin) %>%
mutate(
across(
ends_with("1") | ends_with("2"),
~as.numeric(!is.na(.))
)
) %>%
gbtoolbox::plot_correlations(
confidence_interval = FALSE,
sample_size = FALSE
) +
labs(title = "Child Measures Only")Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use.
Proceed with caution
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(rq1m_childmeasures)
# Now:
data %>% select(all_of(rq1m_childmeasures))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
Code
cbind.data.frame(df)